
# Data collection details

Twitter data capturing the conversation around the 2019–20 Australian bushfires and Covid-19 pandemic was purchased from Twitter and made available for download on 3 October 2020.

* Period 1

* Date Range: `12/14/2019 - 1/25/2020`

* Ruleset: `profile_country:au (bushfire OR bushfires OR australianfires OR arson OR #climatechange OR "climate change" OR #climatemergency OR #scottyfrommarketing OR #liarfromtheshiar OR #climatehoax OR #australiaburns OR #australiaburning OR #climatecrisis OR #climateactionnow OR #itsthegreensfault OR #backburning)`

* Period 2

* Date Range: `3/2/2020 - 4/13/2020`

* Ruleset: `profile_country:au (covid OR coronavirus OR covid-19 OR #COVIDー19 OR pandemic OR WHO OR "world health organization" OR vaccine OR "social distancing" OR quarantine OR #plandemic OR #chinavirus OR wuhan OR #stayhome OR #MadeinChina OR #ChinaLiedPeopleDied OR 5G OR #chinacentric)`

\clearpage

# Data summary and statistics

\clearpage

```{r include = TRUE, results = 'asis'}

load("data/bushfire_tweets.dt.RData")

names(bushfire_tweets.dt) %>%
  kableExtra::kbl(format = 'latex', col.names = "Column names", booktabs = T,
                  caption = "Period 1: Tweet data") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, fig.cap = "Period 1: Tweets by day"}

bushfire_tweets.dt %>%
  dplyr::group_by(date = as.Date(created_at)) %>%
  dplyr::count() %>%
  ggplot(aes(x = date, y = n)) +
  geom_histogram(stat = 'identity') +
  scale_y_continuous(labels = scales::label_number_si())

```

```{r include = TRUE, results = 'asis'}

bushfire_tweets.dt %>%
  dplyr::distinct(tweet_id, user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T, caption = "Period 1: Distinct tweets") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

load("data/bushfire_users.dt.RData")

names(bushfire_users.dt) %>%
  kableExtra::kbl(format = 'latex', col.names = "Column names", booktabs = T,
                  caption = "Period 1: User data") %>%
  kable_styling(latex_options = "striped")

```


```{r include = TRUE, results = 'asis'}

bushfire_users.dt %>%
  dplyr::distinct(user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 1: Distinct users") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

bushfire_tweets.dt %>%
  dplyr::distinct(user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 1: Distinct author users (i.e. exluding reshared/mentioned users)") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

load("data/bushfire_urls.dt.RData")

bushfire_urls.dt %>%
  dplyr::distinct(url) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 1: Distinct urls") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

load("data/bushfire_hashtags.dt.RData")

bushfire_hashtags.dt %>%
  dplyr::distinct(hashtag) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 1: Distinct hashtags") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

bushfire_hashtags.dt %>%
  dplyr::group_by(tolower(hashtag)) %>%
  dplyr::count() %>%
  dplyr::ungroup() %>%
  dplyr::top_n(40, wt = n) %>%
  dplyr::arrange(desc(n)) %>%
  kableExtra::kbl(booktabs = T, longtable = T, col.names = c("hashtag", "frequency"),
                  caption = "Period 1: Top 40 hashtags") %>%
  kableExtra::kable_styling(latex_options = c("striped", "scale_down", "repeat_header"))

```

```{r include = TRUE, results = 'asis'}

load("data/covid_tweets.dt.RData")

names(covid_tweets.dt) %>%
  kableExtra::kbl(format = 'latex', col.names = "Column names", booktabs = T,
                  caption = "Period 2: Tweet data") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, fig.cap = "Period 2: Tweets by day"}

covid_tweets.dt %>%
  dplyr::group_by(date = as.Date(created_at)) %>%
  dplyr::count() %>%
  ggplot(aes(x = date, y = n)) +
  geom_histogram(stat = 'identity') +
  scale_y_continuous(labels = scales::label_number_si())

```


```{r include = TRUE, results = 'asis'}

covid_tweets.dt %>%
  dplyr::distinct(tweet_id, user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 2: Distinct tweets") %>%
  kable_styling(latex_options = "striped")
```

```{r include = TRUE, results = 'asis'}

load("data/covid_users.dt.RData")

names(covid_users.dt) %>%
  kableExtra::kbl(format = 'latex', col.names = "Column names", booktabs = T,
                  caption = "Period 2: User data") %>%
  kable_styling(latex_options = "striped")

```


```{r include = TRUE, results = 'asis'}

covid_users.dt %>%
  dplyr::distinct(user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 2: Distinct users") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

covid_tweets.dt %>%
  dplyr::distinct(user_id) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 2: Distinct author users (i.e. exluding reshared/mentioned users)") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

load("data/covid_urls.dt.RData")

covid_urls.dt %>%
  dplyr::distinct(url) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 2: Distinct urls") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

load("data/covid_hashtags.dt.RData")

covid_hashtags.dt %>%
  dplyr::distinct(hashtag) %>%
  summary() %>%
  kableExtra::kbl(format = 'latex', booktabs = T,
                  caption = "Period 2: Distinct hashtags") %>%
  kable_styling(latex_options = "striped")

```

```{r include = TRUE, results = 'asis'}

covid_hashtags.dt %>%
  dplyr::group_by(tolower(hashtag)) %>%
  dplyr::count() %>%
  dplyr::ungroup() %>%
  dplyr::top_n(40, wt = n) %>%
  dplyr::arrange(desc(n))   %>%
  kableExtra::kbl(booktabs = T, longtable = T, col.names =  c("hashtag", "frequency"),
                  caption = "Period 2: Top 40 hashtags") %>%
  kableExtra::kable_styling(latex_options = c("striped", "scale_down", "repeat_header"),
                            font_size = 10)

```

